knitr::opts_chunk$set(eval = FALSE, warning = FALSE, message = FALSE)
oldpath <- .libPaths()[1]
newpath <- "/home/biostacs/R/backup/env_31723062"
library(ggplot2)
library(Seurat, lib.loc = newpath)
导入第二部分的结果
if (!exists("seu.CCA")) {
seu.CCA <- readRDS("tmp/s2-seu_CCA.rds")
}
seu.CCA <- RunTSNE(seu.CCA, reduction.use = "cca.aligned", dims.use = 1:20)
查看tSNE降维图,检查批次效应矫正的效果
tsne.emb <- GetDimReduction(seu.CCA, reduction.type = "tsne", slot = "cell.embeddings")
tsne.emb <- as.data.frame(tsne.emb)
tsne.emb$sample_id <- as.character(seu.CCA@meta.data[rownames(tsne.emb),]$sample_id)
ggplot(data = tsne.emb, aes(x=tSNE_1, y=tSNE_2, color=sample_id)) +
geom_point(size = .1) +
facet_wrap(~sample_id, ncol = 3) +
theme_bw() +
theme(legend.position = "none",
strip.text = element_text(size = 15, face = "bold"))
Clusters were identifed using the function “FindClusters” from Seurat using default parameters.
聚类
seu.CCA <- FindClusters(seu.CCA, reduction.type = "cca.aligned", dims.use = 1:20, save.SNN = TRUE)
对聚类结果进行评价
The robustness of the clusters was calculated using the function “AssessNodes” from Seurat. For each cluster, a phylogenetic tree based on the distance matrix in gene expression space was computed. Next, Seurat computed an out-of-bag error for a random forest classifer trained on each internal node split of the tree.
构建phylogenetic tree
seu.CCA <- BuildClusterTree(seu.CCA, reorder.numeric = TRUE, do.reorder = TRUE, do.plot = FALSE)
## [1] "Finished averaging RNA for cluster 0"
## [1] "Finished averaging RNA for cluster 1"
## [1] "Finished averaging RNA for cluster 2"
## [1] "Finished averaging RNA for cluster 3"
## [1] "Finished averaging RNA for cluster 4"
## [1] "Finished averaging RNA for cluster 5"
## [1] "Finished averaging RNA for cluster 6"
## [1] "Finished averaging RNA for cluster 7"
## [1] "Finished averaging RNA for cluster 8"
## [1] "Finished averaging RNA for cluster 9"
## [1] "Finished averaging RNA for cluster 10"
## [1] "Finished averaging RNA for cluster 11"
## [1] "Finished averaging RNA for cluster 12"
## [1] "Finished averaging RNA for cluster 13"
## [1] "Finished averaging RNA for cluster 14"
## [1] "Finished averaging RNA for cluster 15"
## [1] "Finished averaging RNA for cluster 16"
## [1] "Finished averaging RNA for cluster 17"
## [1] "Finished averaging RNA for cluster 18"
## [1] "Finished averaging RNA for cluster 1"
## [1] "Finished averaging RNA for cluster 2"
## [1] "Finished averaging RNA for cluster 3"
## [1] "Finished averaging RNA for cluster 4"
## [1] "Finished averaging RNA for cluster 5"
## [1] "Finished averaging RNA for cluster 6"
## [1] "Finished averaging RNA for cluster 7"
## [1] "Finished averaging RNA for cluster 8"
## [1] "Finished averaging RNA for cluster 9"
## [1] "Finished averaging RNA for cluster 10"
## [1] "Finished averaging RNA for cluster 11"
## [1] "Finished averaging RNA for cluster 12"
## [1] "Finished averaging RNA for cluster 13"
## [1] "Finished averaging RNA for cluster 14"
## [1] "Finished averaging RNA for cluster 15"
## [1] "Finished averaging RNA for cluster 16"
## [1] "Finished averaging RNA for cluster 17"
## [1] "Finished averaging RNA for cluster 18"
## [1] "Finished averaging RNA for cluster 19"
计算out-of-bag error,文章认为oobe<0.1的节点分类是可靠的。
oob <- AssessNodes(seu.CCA)
oob
## node oobe
## 1 20 0.026977975
## 2 21 0.017106612
## 3 24 0.038723404
## 4 29 0.003200190
## 5 34 0.012274645
## 6 37 0.137931034
## 7 35 0.014610390
## 8 30 0.022845275
## 9 25 0.008376289
## 10 26 0.091981700
## 11 27 0.008272506
## 12 31 0.002969121
## 13 36 0.016689847
## 14 22 0.008639053
## 15 23 0.002000762
## 16 28 0.013710618
## 17 32 0.098403921
## 18 33 0.026643747
可以看到节点37的oobe=0.138>0.1,我们可以将节点37的两个分类合并,从phylogenetic tree上看,就是cluster2和cluster3。
PlotClusterTree(seu.CCA)
tsne.emb$ident <- factor(as.character(seu.CCA@ident), levels = levels(seu.CCA@ident))
ggplot(data = tsne.emb, aes(x=tSNE_1, y=tSNE_2, color=ident)) +
geom_point(size = .1) +
facet_wrap(~sample_id, ncol = 3) +
guides(colour = guide_legend(title = "Cluster",
title.hjust=0.5,
keyheight = 1.5,
override.aes = list(size=6))) +
theme_bw() +
theme(strip.text = element_text(size = 15, face = "bold"))
查看cluster2和cluster3
tsne.emb$is_2_3 <- tsne.emb$ident %in% c(2,3)
ggplot(data = tsne.emb, aes(x=tSNE_1, y=tSNE_2, color=is_2_3)) +
geom_point(size = .1) +
scale_color_manual(values = c("grey","red")) +
guides(colour = guide_legend(title = "Cluster 2&3",
title.hjust=0.5,
keyheight = 1.5,
override.aes = list(size=6))) +
theme_bw() +
theme(strip.text = element_text(size = 15, face = "bold"))
合并节点37,重新构建phylogenetic tree
seu.CCA <- MergeNode(seu.CCA, node.use = 37, rebuild.tree = TRUE,
reorder.numeric = TRUE, do.reorder = TRUE, do.plot = FALSE)
## [1] "Finished averaging RNA for cluster 1"
## [1] "Finished averaging RNA for cluster 2"
## [1] "Finished averaging RNA for cluster 4"
## [1] "Finished averaging RNA for cluster 5"
## [1] "Finished averaging RNA for cluster 6"
## [1] "Finished averaging RNA for cluster 7"
## [1] "Finished averaging RNA for cluster 8"
## [1] "Finished averaging RNA for cluster 9"
## [1] "Finished averaging RNA for cluster 10"
## [1] "Finished averaging RNA for cluster 11"
## [1] "Finished averaging RNA for cluster 12"
## [1] "Finished averaging RNA for cluster 13"
## [1] "Finished averaging RNA for cluster 14"
## [1] "Finished averaging RNA for cluster 15"
## [1] "Finished averaging RNA for cluster 16"
## [1] "Finished averaging RNA for cluster 17"
## [1] "Finished averaging RNA for cluster 18"
## [1] "Finished averaging RNA for cluster 19"
## [1] "Finished averaging RNA for cluster 1"
## [1] "Finished averaging RNA for cluster 2"
## [1] "Finished averaging RNA for cluster 3"
## [1] "Finished averaging RNA for cluster 4"
## [1] "Finished averaging RNA for cluster 5"
## [1] "Finished averaging RNA for cluster 6"
## [1] "Finished averaging RNA for cluster 7"
## [1] "Finished averaging RNA for cluster 8"
## [1] "Finished averaging RNA for cluster 9"
## [1] "Finished averaging RNA for cluster 10"
## [1] "Finished averaging RNA for cluster 11"
## [1] "Finished averaging RNA for cluster 12"
## [1] "Finished averaging RNA for cluster 13"
## [1] "Finished averaging RNA for cluster 14"
## [1] "Finished averaging RNA for cluster 15"
## [1] "Finished averaging RNA for cluster 16"
## [1] "Finished averaging RNA for cluster 17"
## [1] "Finished averaging RNA for cluster 18"
plotClusterTree(seu.CCA)
重新计算out-of-bag error
oob <- AssessNodes(seu.CCA)
oob
## node oobe
## 1 19 0.027900824
## 2 20 0.030945669
## 3 23 0.004433186
## 4 30 0.014610390
## 5 31 0.022845275
## 6 24 0.012474161
## 7 25 0.002969121
## 8 35 0.016689847
## 9 26 0.004050223
## 10 28 0.091981700
## 11 29 0.008422852
## 12 34 0.010484593
## 13 21 0.008639053
## 14 22 0.002000762
## 15 27 0.013710618
## 16 32 0.098403921
## 17 33 0.026643747
这下所有节点的oobe<0.1,认为此时的聚类是可靠的(我们得到了18个cluster,文章中给出了16个cluster)
tsne.emb$ident <- factor(as.character(seu.CCA@ident), levels = levels(seu.CCA@ident))
ggplot(data = tsne.emb, aes(x=tSNE_1, y=tSNE_2, color=ident)) +
geom_point(size = .1) +
facet_wrap(~sample_id, ncol = 3) +
guides(colour = guide_legend(title = "Cluster",
title.hjust=0.5,
keyheight = 1.5,
override.aes = list(size=6))) +
theme_bw() +
theme(strip.text = element_text(size = 15, face = "bold"))
保存计算结果
saveRDS(oob, "tmp/s3.1-clustering_robustness.rds")
saveRDS(seu.CCA, "tmp/s3.2-seu_CCA_clustering.rds")